
** pa_violations_for_replication: Runs the main analysis on PA violations data and VA production/liability/violations data. Should be run second.

clear all

** Set major file path

global MASTER "C:\Users\tpeis\Dropbox\Coal Bonding"

** Standard Header Stuff
capture log close
set more off

************************************************************************************
************************************************************************************
set scheme stmono2


** Read in PA violations data from Arthur
import excel using "$MASTER\Data\Violations\PA_violations_noted.xlsx", firstrow clear

* Get inspection years
gen inspection_year = year(inspection_date)

* Get violation years - different format
gen violation_year = year(violation_date)

* Get executed years
gen executed_year = year(date_executed)

* Get dummies for all the major enforcement statuses
encode enforcement_status, gen(es_number)


gen forfeited = es_number==4
gen comply = es_number==3
gen unresolvable = es_number==8
gen superseded = es_number==6

* Get dummies for the major violation types
encode violation_type, gen(vt_number)

gen ehs = vt_number==2

gen ones = 1

replace program = strtrim(program)

* Drop minerals and non coal?
drop if program=="MING Industrial Minerals Regulatory"

* Drop quarries
drop if strpos(facility_name, "QUARRY")!=0
drop if strpos(facility_name, "QRY")!=0

* Drop refuse areas
drop if strpos(facility_name, "CRDA")!=0
drop if strpos(facility_name, "Refuse")!=0

* Collapse by facility and violation years
collapse (sum) ones penalty_amount ehs (max) forfeited comply unresolvable superseded, by(permit_number facility_name violation_year)

drop if permit_number==""

* Check dupes
duplicates list permit_number violation_year


* Rename variables for merging
gen year = violation_year
rename permit_number PermitNumber




rename ones num_violations

save "$MASTER\Data\Violations\PA_violations_cleaned.dta", replace


** Read in cleaned OSMRE/bonding
use "$MASTER\Data\Mine Level Production\PA\pa_for_violations.dta", clear

merge 1:1 PermitNumber year using "$MASTER\Data\Violations\PA_violations_cleaned.dta", keepusing(num_violations penalty_amount ehs forfeited comply unresolvable)

drop if _merge==2

* Assume missing violation data is 0
replace num_violations = 0 if num_violations==.
replace ehs = 0 if ehs==.

bysort permid: egen max_viol = max(num_violations)

gen in_viol = max_viol!=0 & max_viol!=.

gen viol_at_all = num_violations!=0 & num_violations!=.
gen ehs_at_all = ehs!=0 & ehs!=.

** Try to clock violations over time
bysort year underground: egen year_viol = sum(num_violations)



* Square mileage for each county: source usa.com
replace sqmi = .
replace sqmi = 730.07 if county=="Allegheny"
replace sqmi = 653.20 if county=="Armstrong"
replace sqmi = 434.71 if county=="Beaver"
replace sqmi = 1013.30 if county=="Bedford"
replace sqmi = 525.80 if county=="Blair"
replace sqmi = 788.60 if county=="Butler"
replace sqmi = 688.35 if county=="Cambria"
replace sqmi = 396.23 if county=="Cameron"
replace sqmi = 381.46 if county=="Carbon"
replace sqmi = 1109.92 if county=="Centre"
replace sqmi = 600.83 if county=="Clarion"
replace sqmi = 1144.72 if county=="Clearfield"
replace sqmi = 887.98 if county=="Clinton"
replace sqmi = 483.11 if county=="Columbia"
replace sqmi = 525.05 if county=="Dauphin"
replace sqmi = 827.36 if county=="Elk"
replace sqmi = 790.34 if county=="Fayette"
replace sqmi = 437.55 if county=="Fulton"
replace sqmi = 575.95 if county=="Greene"
replace sqmi = 874.64 if county=="Huntingdon"
replace sqmi = 827.03 if county=="Indiana"
replace sqmi = 652.43 if county=="Jefferson"
replace sqmi = 459.08 if county=="Lackawanna"
replace sqmi = 458.37 if county=="Northumberland"
replace sqmi = 778.63 if county=="Schuylkill"
replace sqmi = 1074.37 if county=="Somerset"
replace sqmi = 449.94 if county=="Sullivan"
replace sqmi = 1133.79 if county=="Tioga"
replace sqmi = 674.28 if county=="Venango"
replace sqmi = 856.99 if county=="Washington"
replace sqmi = 1027.55 if county=="Westmoreland"


* Log population
replace popdens = pop/sqmi



* Get violations per production
gen viol_per_prod = num_violations/GrossTonsTonsbeforemoisture
gen ln_vpp = ln(viol_per_prod)
gen ln_vpp_p1 = ln(viol_per_prod + 1)
gen ln_viol = ln(num_violations)
gen ln_viol_p1 = ln(1 + num_violations)


* Match on pop density
cem lct popdens if year==2000 & surface==1, treatment(bituminous) k2k
drop cem_matched_dens_surf
bysort permid: egen cem_matched_dens_surf = max(cem_matched) if surface==1

drop cem_matched

* Mine level
bysort mine_id year: egen mine_viol = sum(num_violations)
gen mine_vaa = mine_viol!=0 & mine_viol!=.

gen ln_viol_p1_mine = ln(1 + mine_viol)

** Try penalty amounts
gen lpa = ln(penalty_amount)

* Try log violations
gen log_viol = ln(num_violations)


*** Virginia Comparisons


** Stack with VA
gen state = "PA"
append using "C:\Users\tpeis\Dropbox\Coal Bonding\Data\Violations\Virginia\va_to_stack_pa.dta"

* Basic analysis just looking at liability
gen pa = state=="PA"
gen p01 = pa*(year>=2001)
gen p05 = pa*(year>=2005)

replace ln_tons = log_tons if state=="PA"



* Fix age with VA
drop age ln_age min_year

bysort PermitNumber: egen min_year = min(year)
gen age = year - min_year + 1
gen ln_age = ln(age)


* Generate matching for VA
* pre and post, VA and PA, surface vs underground
replace surface=0 if state=="VA" & underground==1
replace surface=1 if state=="VA" & underground==0


* Get sqmi for VA
replace sqmi = 502.76 if county=="BUCHANAN" & state=="VA"
replace sqmi = 502.76 if county=="BUCHANAN, TAZEWELL" & state=="VA"
replace sqmi = 330.53 if (county=="DICKENSON" | county=="DICKENSON, WISE") & state=="VA"
replace sqmi = 435.52 if county=="LEE" & state=="VA"
replace sqmi = 473.82 if (county=="RUSSELL" | county=="RUSSELL, DICKENSON") & state=="VA"
replace sqmi = 535.53 if county=="SCOTT" & state=="VA"
replace sqmi = 518.85 if county=="TAZEWELL" & state=="VA"
replace sqmi = 403.19 if county=="WISE" & state=="VA"

replace popdens = pop/sqmi



















** Table 6
* Column 1
reghdfe bond_miss_prod_0 p01 pa ln_age ln_pop if bituminous==1 & underground==0 & year>=2000 & year<=2010, absorb(year) cluster(PermitNumber)

* Column 2
reghdfe bond_miss_prod_0 p01 ln_age ln_pop if bituminous==1 & underground==0 & year>=2000 & year<=2010, absorb(year PermitNumber) cluster(PermitNumber)

* Generate relevant doses for PA vs VA
bysort PermitNumber treatment_01: egen avg_liability_pre_post_va = mean(lct) if year>=1996

gen avg_liability_post_va = avg_liability_pre_post_va if treatment_01==1
bysort permid: egen avg_liability_post_full_va = max(avg_liability_post_va)

gen avg_liability_pre_va = avg_liability_pre_post_va if treatment_01==0
bysort permid: egen avg_liability_pre_full_va = max(avg_liability_pre_va)

gen dose_va = avg_liability_pre_full_va*bituminous*treatment_01*pa

* Column 3
reghdfe bond_miss_prod_0 dose_va pa ln_age ln_pop if bituminous==1 & underground==0 & year>=2000 & year<=2010, absorb(year) cluster(PermitNumber)

* Column 4
reghdfe bond_miss_prod_0 dose_va ln_age ln_pop if bituminous==1 & underground==0 & year>=2000 & year<=2010, absorb(year PermitNumber) cluster(PermitNumber)


* Matching for PA vs VA
* Pop Matching
cem lct ln_pop if year==2000 & surface==1, treatment(pa)

bysort PermitNumber: egen cem_matched_va_surface = max(cem_matched) if surface==1

drop cem_matched

* Density Matching
cem lct popdens if year==2000 & surface==1, treatment(pa)

bysort PermitNumber: egen cem_matched_va_surface_dens = max(cem_matched) if surface==1

drop cem_matched

* Column 5
reghdfe bond_miss_prod_0 p01 ln_age ln_pop if bituminous==1 & surface==1 & year>=2000 & year<=2010 & cem_matched_va_surface_dens==1, absorb(year PermitNumber) cluster(PermitNumber)

* Column 6
reghdfe bond_miss_prod_0 dose_va ln_age ln_pop if bituminous==1 & underground==0 & year>=2000 & year<=2010 & cem_matched_va_surface==1, absorb(year PermitNumber) cluster(PermitNumber)



** Table 8

* Try ppml
* Column 1
ppmlhdfe num_violations dose ln_age lppi bituminous ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1, absorb(year permid) cluster(permid)

* Column 2
ppmlhdfe num_violations dose ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1, absorb(year permid) cluster(permid)

* Column 3
ppmlhdfe num_violations dose ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1, absorb(year permid) cluster(permid)

* Column 4
ppmlhdfe num_violations b01 ln_age lppi bituminous ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1, absorb(year permid) cluster(permid)

* Column 5
ppmlhdfe num_violations b01 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1, absorb(year permid) cluster(permid)

* Column 6
ppmlhdfe num_violations b01 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1, absorb(year permid) cluster(permid)




** Table 9

* Column 1
ppmlhdfe num_violations p01 pa ln_age ln_pop if year>=1996 & year<=2014 & underground==0 & bituminous==1, absorb(year) cluster(PermitNumber)

* Column 2
ppmlhdfe num_violations p01 pa ln_age ln_pop if year>=1996 & year<=2014 & underground==0 & bituminous==1, absorb(year PermitNumber) cluster(PermitNumber)

* Column 3
ppmlhdfe num_violations p01 pa ln_age ln_pop if year>=1996 & year<=2014 & underground==0 & bituminous==1 & cem_matched_va_surface==1, absorb(year PermitNumber) cluster(PermitNumber)

* Column 4
ppmlhdfe num_violations dose_va pa ln_age ln_pop if year>=1996 & year<=2014 & underground==0 & bituminous==1, absorb(year) cluster(PermitNumber)

* Column 5
ppmlhdfe num_violations dose_va ln_age ln_pop if year>=1996 & year<=2014 & underground==0 & bituminous==1, absorb(year PermitNumber) cluster(PermitNumber)

* Column 6
ppmlhdfe num_violations dose_va pa ln_age ln_pop if year>=1996 & year<=2014 & underground==0 & bituminous==1 & cem_matched_va_surface==1, absorb(year PermitNumber) cluster(PermitNumber)




** Table 10
gen viol_at_all_sample = viol_at_all*(year>=1996)*(year<=2014)
drop ever_viol
bysort permid: egen ever_viol = max(viol_at_all_sample)

* Column 1
reghdfe bond_miss_prod_0 b01 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1 & viol_at_all==0, absorb(year permid) cluster(permid)

* Column 2
reghdfe bond_miss_prod_0 b01 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1 & viol_at_all==1, absorb(year permid) cluster(permid)

* Using "ever violated" instead of contemporary status (in footnote).
reghdfe bond_miss_prod_0 b01 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1 & ever_viol==0, absorb(year permid) cluster(permid)
reghdfe bond_miss_prod_0 b01 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1 & ever_viol==1, absorb(year permid) cluster(permid)



** Table 11 Row 2
ppmlhdfe num_violations dose ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1, absorb(year permid) cluster(permid)
ppmlhdfe num_violations dose ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full_96==1, absorb(year permid) cluster(permid)

** Table 13

* Regenerate "both types" variables for VA
drop max_firm_surface max_firm_underground

bysort company_id: egen max_firm_surface = max(surface)
bysort company_id: egen max_firm_underground= max(underground)

gen mfu_va = max_firm_underground
replace mfu_va = 0 if state=="VA"

gen mfs_va = max_firm_surface
replace mfs_va = 0 if state=="VA"


* Row 5
ppmlhdfe num_violations dose ln_age lppi bituminous ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_full==1 & max_firm_underground==0, absorb(year permid) cluster(permid)

* Row 6
reghdfe bond_miss_prod_0 p01 pa ln_age ln_pop if bituminous==1 & underground==0 & year>=2000 & year<=2010 & mfu_va==0, absorb(year) cluster(PermitNumber)




** Table 14
* Row 4
teffects nnmatch (ln_viol_p1 ln_age lppi ln_pop max_prod year treatment_01) (b01) if surface==1 & year>=1996 & year<=2014, dmv vce(robust) nneighbor(1) atet biasadj(ln_age lppi ln_pop max_prod)


** Table 15
* Row 3
ppmlhdfe num_violations dose ln_age lppi ln_pop if year>=1996 & year<=2010 & surface==1 & cem_matched_full==1, absorb(year permid) cluster(permid)

** Table 16
* Row 3
* Density
ppmlhdfe num_violations dose ln_age lppi bituminous ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_dens_surf==1, absorb(year permid) cluster(permid)
ppmlhdfe num_violations b01 ln_age lppi bituminous ln_pop if year>=1996 & year<=2014 & surface==1 & cem_matched_dens_surf==1, absorb(year permid) cluster(permid)



*** Appendix

** Table 19

* Column 1
reghdfe ln_viol_p1 b05 ln_age lppi ln_pop bituminous if year>=1996 & year<=2018 & surface==0, absorb(year) cluster(mine_id)

* Column 2
reghdfe ln_viol_p1 b05 ln_age lppi ln_pop if year>=1996 & year<=2018 & surface==0, absorb(year permid) cluster(mine_id)

* Column 3
reghdfe ln_viol_p1 b05 ln_age lppi if year>=1996 & year<=2018 & surface==0, absorb(year permid) cluster(mine_id)

* Column 4
reghdfe ln_viol_p1 b05 ln_age lppi ln_pop bituminous if year>=1996 & year<=2014 & surface==0, absorb(year) cluster(mine_id)

* Column 5
reghdfe ln_viol_p1 b05 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==0, absorb(year permid) cluster(mine_id)

* Column 6
reghdfe ln_viol_p1 b05 ln_age lppi if year>=1996 & year<=2014 & surface==0, absorb(year permid) cluster(mine_id)


** Table 20

* Column 1
reghdfe bond_miss_prod_0 b05 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==0 & viol_at_all==0, absorb(year permid) cluster(permid)

* Column 2
reghdfe bond_miss_prod_0 b01 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==0 & viol_at_all==1, absorb(year permid) cluster(permid)

* Using "ever violated" instead of contemporary status (in footnote).
reghdfe bond_miss_prod_0 b01 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1  & ever_viol==0, absorb(year permid) cluster(permid)
reghdfe bond_miss_prod_0 b01 ln_age lppi ln_pop if year>=1996 & year<=2014 & surface==1  & ever_viol==1, absorb(year permid) cluster(permid)



** Figure 7

bysort year bituminous underground: egen yearly_violators = sum(viol_at_all)
bysort year bituminous underground: egen yearly_active_violators = sum(viol_at_all) if log_tons!=.

gen viol_pct = yearly_violators/yearly_count
gen viol_pct_active = yearly_active_violators/yearly_count

sort year

twoway (connected viol_pct year if year>=1996 & surface==1 & bituminous==1 & year<=2018 & cem_matched_full==1) (connected viol_pct year if year>=1996 & surface==1 & bituminous==0 & cem_matched_full==1 & year<=2018), xtitle("Year") ytitle("Proportion of Surface Mines with Violations") legend(lab(1 "Bituminous") lab(2 "Anthracite")) ///
saving(prod_total, replace)
graph export "$MASTER\Tables\pa_viol_surface.pdf", replace

twoway (connected viol_pct year if year>=1996 & surface==0 & bituminous==1 & year<=2018) (connected viol_pct year if year>=1996 & surface==0 & bituminous==0 & year<=2018), xtitle("Year") ytitle("Proportion of Underground Mines with Violations") legend(lab(1 "Bituminous") lab(2 "Anthracite")) ///
saving(prod_total, replace)
graph export "$MASTER\Tables\pa_viol_underground.pdf", replace


* Figure 10

** PPML Event study
ppmlhdfe num_violations c.dose_es##ib2000.year ln_age ln_pop if year>=1996 & year<=2014 & cem_matched_dens_surf==1, absorb(year permid) cluster(permid)
label variable year "Year"
coefplot, keep(*c.dose_es) nolabels coeflabels(, truncate(4)) baselevels omitted vertical xline(5) yline(0) xlabel(, alternate angle(90) labsize(small)) xtitle("Year") ytitle("Coefficient Value")

graph export "$MASTER\Tables\pa_viol_es_01_ppml.pdf", replace


** Appendix

** Figure 13
eventdd ln_viol_p1 ln_age bond_miss_prod_0 if year>=1996 & year<2018 & surface==0, level(90) timevar(ytreat) hdfe absorb(year permid) cluster(permid)
graph export "$MASTER\Tables\pa_sub_violations.pdf", replace


